# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2025/9/21 22:17
# @Author  : Dell
# @File    : doucment_split.py
# @Software: PyCharm
# @Desc    :文本分割器借助语义分割器 https://python.langchain.com
from langchain.text_splitter import RecursiveCharacterTextSplitter
with open('xiyouji.txt', 'r', encoding='gbk', errors='ignore') as f:
    pg_work =f.read()
print(f"你有{len([pg_work])}个文档")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,# 定义切分块的大小
    chunk_overlap=20 # 定义重复长度
)
texts=text_splitter.create_documents([pg_work]) # 按约定长度进行切割
print(f"你有{len(texts)}个文档")
print(texts[0].page_content,'/n')
print(texts[1].page_content)